* ==========================================================
* --------- ANES 2000 
* ==========================================================

use "${dir_raw}/anes2000ts.dta", clear

* ---------- respondent characterristics
gen id = V000001

gen r_age = V000908 if ~missing(V000908)
gen r_sex = V001029
	gen r_female = (V001029 ==2) if ~missing(V001029)

gen r_race = . 
	replace r_race = 1 if missing(r_race) & V000066 == 1
	replace r_race = 2 if missing(r_race) & V000066 == 2
	replace r_race = 3 if missing(r_race) & V000066 > 2 & V000066 < 8
	replace r_race = 1 if missing(r_race) & V001030 == 1
	replace r_race = 2 if missing(r_race) & V001030 == 2
	replace r_race = 4 if missing(r_race) & V001030 == 0 // not specified : phone-mode

	gen r_white =  (r_race == 1) if ~missing(r_race)
	gen r_black =  (r_race == 2) if ~missing(r_race)
	gen r_others = (r_race == 3) if ~missing(r_race)
	gen r_racena = (r_race == 4) if ~missing(r_race)

gen r_marstat = V000909 
recode r_marstat (0 8 9 =.)
*                          0. NA |          5        0.28        0.28
*                     1. MARRIED |        935       51.74       52.02
*                     2. WIDOWED |        168        9.30       61.32
*                    3. DIVORCED |        238       13.17       74.49
*                   4. SEPARATED |         55        3.04       77.53
*               5. NEVER MARRIED |        348       19.26       96.79
*6. PARTNERED, NOT MARRIED [VOL] |         49        2.71       99.50
*                          8. DK |          1        0.06       99.56
*                          9. RF |          8        0.44      100.00
*

	gen r_married = r_marstat == 1 if ~missing(r_marstat)
	gen r_single  = r_marstat == 5 if ~missing(r_marstat)

gen r_adults = V000039 
gen r_childs = V001024 if V001024  < 90 
replace r_childs = 0 if V001023 == 5

gen r_degree = V000913 if V000913 != 9
	recode r_degree (1=8) (2=11) (3=12) (4=13) (5=14) (6=16) (7=18) (9=.), gen(r_educ)

*tab V000879
*            1. EVERY WEEK  |        479       26.51       57.39
*      2. ALMOST EVERY WEEK |        205       11.34       68.73
*  3. ONCE OR TWICE A MONTH |        270       14.94       83.67
*     4. A FEW TIMES A YEAR |        282       15.61       99.28
*                 5. NEVER  |          8        0.44       99.72
*                     8. DK |          1        0.06       99.78
*                     9. RF |          4        0.22      100.00

gen r_attendance = 5-V000879 if V000879 >0 & V000879 <8
	replace r_attendance = 0 if V000877 == 5

gen r_wrkstat = V000920 if V000920 > 0 
	recode r_wrkstat (1=1) (else=0), gen(r_working)
	recode r_wrkstat (2 4=1) (else=0), gen(r_unemployed)
	recode r_wrkstat (5=1) (else=0), gen(r_retired)
	recode r_wrkstat (6 7 8=1) (else=0), gen(r_otherworks)
*lookfor K1
gen r_partyid = V000523 if V000523 != 9 
gen r_partyid7 = r_partyid +1
	recode r_partyid7 (8=.) (9=.)

*gen r_partyid_c = .
*
*replace r_partyid_c = 0 if V000519 == 1 & V000520 ==1 
*replace r_partyid_c = 1 if V000519 == 1 & (V000520 ==5| V000520 ==8|V000520 ==9)
*replace r_partyid_c = 2 if (V000519 == 3 | V000519 == 4 | V000519 == 5 ) & V000522 == 5
*replace r_partyid_c = 3 if (V000519 == 3 | V000519 == 4 | V000519 == 5) & (V000522 == 3)
*replace r_partyid_c = 4 if (V000519 == 3 | V000519 == 4 | V000519 == 5 ) & V000522 == 1
*replace r_partyid_c = 5 if V000519 == 2 & (V000521 ==5| V000521 ==8|V000521 ==9)
*replace r_partyid_c = 6 if V000519 == 2 & V000521 ==1 
*replace r_partyid_c = 7 if V000519 == 9 | (V000519 == 4 & (V000521==8 | V000522==8))
*
*replace r_partyid_c = 9 if V000519 == 8 
*tab V000523 r_partyid_c,m

*tab V000519 // K1
*tab V000520 // K1a
*tab V000521 // K1b
*tab V000522 // K1c
*tab V000523 // summary 

gen r_ideo = V000446 if V000446 > 0 & V000446 < 8

/* in ANES 
----------------------------------------+-----------------------------------
                                  0. NA |          1        0.06        0.06
1. SCALE: 1 / BRANCHING: strong liberal |         82        4.54        4.59
2. SCALE: 2 / BRANCHING: not strong lib |        158        8.74       13.34
3. SCALE: 3. had to choose liberal / BR |        351       19.42       32.76
4. SCALE: 4. had to choose moderate/ BR |        109        6.03       38.79
5. SCALE: 5. had to choose conserv/ BRA |        528       29.22       68.01
6. SCALE: 6 / BRANCHING: not strong con |        250       13.84       81.85
7. SCALE: 7 / BRANCHING: strong conserv |        145        8.02       89.87
                                  8. DK |         18        1.00       90.87
                 9. R refuses to choose |        165        9.13      100.00
----------------------------------------+-----------------------------------
                                  Total |      1,807      100.00
*/ 

/* in GSS 
            tabulation:  Freq.   Numeric  Label
                         1,424         1  extremely liberal
                         5,886         2  liberal
                         6,444         3  slightly liberal
                        19,483         4  moderate
                         8,025         5  slghtly conservative
                         7,450         6  conservative
                         1,613         7  extrmly conservative
                         2,166        .d  dk
                         6,777        .i  
                           331        .n  na

*/ 


gen r_pol_interest = V001201 if V001201 > 0 
	recode r_pol_interest (0 = .) (1=3) (3=2) (5=1)

gen r_talk_politics = V001204 == 1 if V001204 > 0
gen r_talk_freq = V001205 if V001205 > 0 // talk frequency
	recode r_talk_freq (96 = 0) (98=.)
	replace r_talk_freq = 0 if r_talk_politics == 0

* A3a. How often does R discuss politics |      Freq.     Percent        Cum.
*----------------------------------------+-----------------------------------
*00. NA. INAP, 5, 8, 9 in A3. no Post in |        549       30.38       30.38
*                                      1 |         63        3.49       33.87
*                                      2 |        149        8.25       42.11
*                                      3 |        145        8.02       50.14
*                                      4 |         97        5.37       55.51
*                                      5 |        110        6.09       61.59
*                                      6 |         23        1.27       62.87
*                                      7 |        648       35.86       98.73
*                     96. NONE (NO DAYS) |         21        1.16       99.89
*                                 98. DK |          2        0.11      100.00
*----------------------------------------+-----------------------------------
*                                  Total |      1,807      100.00
*


*des V001699-V001734

* ------ network size 
egen n_size = anycount(V001699 V001700 V001701 V001702),values(1)
	replace n_size = . if V001699 == 0

*codebook V001709 V001717 V001725 V001733
*codebook V001710 V001718 V001726 V001734

* ------ network voting behaviors 
gen a1_vote = V001710 if V001710 > 0 
gen a2_vote = V001718 if V001718 > 0 
gen a3_vote = V001726 if V001726 > 0 
gen a4_vote = V001734 if V001734 > 0 
*                         1,480         0  00. NA. INAP, 5, 8, 9, 0 in Z1
*                           105         1  1. AL GORE
*                           147         3  3. GEORGE W BUSH
*                             6         5  5. SOME OTHER CANDIDATE
*                                          (SPECIFY)
*                            26         7  7. DIDN'T VOTE
*                             6         8  8. INELIGIBLE TO VOTE [VOL]
*                            36        98  98. DK - DON'T PROBE
*                             1        99  99. RF - DON'T PROBE
*

gen a1_vote_dk = (a1_vote == 98 | a1_vote == 99) if ~missing(a1_vote)
gen a2_vote_dk = (a2_vote == 98 | a2_vote == 99) if ~missing(a2_vote)
gen a3_vote_dk = (a3_vote == 98 | a3_vote == 99) if ~missing(a3_vote)
gen a4_vote_dk = (a4_vote == 98 | a4_vote == 99) if ~missing(a4_vote)

replace a1_vote = . if a1_vote_dk == 1
replace a2_vote = . if a2_vote_dk == 1
replace a3_vote = . if a3_vote_dk == 1
replace a4_vote = . if a4_vote_dk == 1

egen n_vote_dk = rowtotal(a?_vote_dk)
	replace n_vote_dk = . if n_size == 0
egen p_vote_dk = rowmean(a?_vote_dk)


* ------- R's vote 
tab V001241 V001248
tab V001248 V001249
tab V001249

gen r_vote = V001248 == 1 if V001241 > 0 
gen r_vote_bush = V001249 == 3 if V001249 > 0 & V001249 < 8
	replace r_vote_bush = .5 if V001249 == 2 
	replace r_vote_bush = .5 if V001249 == 4 
	replace r_vote_bush = .5 if V001249 == 5 
	replace r_vote_bush = .5 if V001249 == 6 
*          C6. R vote cast for President |      Freq.     Percent        Cum.
*----------------------------------------+-----------------------------------
*          0. NA. INAP, 5, 8, 9, 0 in C5 |        629       34.81       34.81
*                            1. AL GORE  |        590       32.65       67.46
*2. HOWARD PHILLIPS-CONSTITUTION PARTY C |          1        0.06       67.52
*                      3. GEORGE W. BUSH |        530       29.33       96.85
*   4. HARRY BROWN-LIBERTARIAN CANDIDATE |          4        0.22       97.07
*                       5. PAT BUCHANAN  |          3        0.17       97.23
*                         6. RALPH NADER |         33        1.83       99.06
*           7. R REPORTS VOTING FOR SELF |          1        0.06       99.11
*                                 8. DK  |          3        0.17       99.28
*                                 9. RF  |         13        0.72      100.00
*----------------------------------------+-----------------------------------
*                                  Total |      1,807      100.00

* none-voter;
codebook V001276-V001286

gen r_nonvote_pref = V001276 == 1 if V001276 > 0 
	replace r_nonvote_pref = . if V001276 >= 8 

tab V001277
gen r_nonvote_bush = V001277==3 if  V001277 > 0 & V001277 < 8
	replace r_nonvote_bush = .5 if V001277 == 5 
	replace r_nonvote_bush = .5 if V001277 == 6 
	replace r_nonvote_bush = .5 if V001277 == 7 

gen r_bush = r_vote_bush
	replace r_bush = r_nonvote_bush if ~missing(r_nonvote_bush)
	replace r_bush = -1 if r_vote == 0 

gen a1_vote_bush = a1_vote ==3 if a1_vote < 90
	replace a1_vote_bush = .5 if a1_vote == 5
	replace a1_vote_bush = -1 if a1_vote == 7
	replace a1_vote_bush = -1 if a1_vote == 8

gen a2_vote_bush = a2_vote ==3 if a2_vote < 90
	replace a2_vote_bush = .5 if a2_vote == 5
	replace a2_vote_bush = -1 if a2_vote == 7
	replace a2_vote_bush = -1 if a2_vote == 8

gen a3_vote_bush = a3_vote ==3 if a3_vote < 90
	replace a3_vote_bush = .5 if a3_vote == 5
	replace a3_vote_bush = -1 if a3_vote == 7
	replace a3_vote_bush = -1 if a3_vote == 8

gen a4_vote_bush = a4_vote ==3 if a4_vote < 90
	replace a4_vote_bush = .5 if a4_vote == 5
	replace a4_vote_bush = -1 if a4_vote == 7
	replace a4_vote_bush = -1 if a4_vote == 8

* when measuring political homophily -- how to consider "no vote"?
* below is to exclude "non-voters"
gen r_bush_a = r_bush if r_bush >= 0 & ~missing(r_bush)
gen a1_vote_bush_a = a1_vote_bush if a1_vote_bush >= 0 & ~missing(a1_vote_bush)
gen a2_vote_bush_a = a2_vote_bush if a2_vote_bush >= 0 & ~missing(a2_vote_bush)
gen a3_vote_bush_a = a3_vote_bush if a3_vote_bush >= 0 & ~missing(a3_vote_bush)
gen a4_vote_bush_a = a4_vote_bush if a4_vote_bush >= 0 & ~missing(a4_vote_bush)


* voting homophily;
gen a1_vote_same = (a1_vote_bush == r_bush) if ~missing(a1_vote_bush) & ~missing(r_bush)
gen a2_vote_same = (a2_vote_bush == r_bush) if ~missing(a2_vote_bush) & ~missing(r_bush)
gen a3_vote_same = (a3_vote_bush == r_bush) if ~missing(a3_vote_bush) & ~missing(r_bush)
gen a4_vote_same = (a4_vote_bush == r_bush) if ~missing(a4_vote_bush) & ~missing(r_bush)

egen n_same_vote = rowtotal(a?_vote_same)
	replace n_same_vote = . if missing(n_size)
egen p_same_vote = rowmean(a?_vote_same)

gen a1_vote_same_a = (a1_vote_bush_a == r_bush_a) if ~missing(a1_vote_bush_a) & ~missing(r_bush_a)
gen a2_vote_same_a = (a2_vote_bush_a == r_bush_a) if ~missing(a2_vote_bush_a) & ~missing(r_bush_a)
gen a3_vote_same_a = (a3_vote_bush_a == r_bush_a) if ~missing(a3_vote_bush_a) & ~missing(r_bush_a)
gen a4_vote_same_a = (a4_vote_bush_a == r_bush_a) if ~missing(a4_vote_bush_a) & ~missing(r_bush_a)

egen n_same_vote_a = rowtotal(a?_vote_same_a)
	replace n_same_vote_a = . if missing(n_size)
egen p_same_vote_a = rowmean(a?_vote_same_a)


forvalues i = 1/4{
	recode a`i'_vote (1/5=1) (7/8=0)
}

* these two measure show high correlation (=93.94)
pwcorr p_same_vote p_same_vote_a
tab r_bush_a a1_vote_bush_a 


* relationship type -----------
gen a1_relative = V001703 
gen a2_relative = V001711
gen a3_relative = V001719
gen a4_relative = V001727

recode a1_relative (0 8 9 =.) (1 =1) (3 5=0),gen(a1_spouse)
recode a2_relative (0 8 9 =.) (1 =1) (3 5=0),gen(a2_spouse)
recode a3_relative (0 8 9 =.) (1 =1) (3 5=0),gen(a3_spouse)
recode a4_relative (0 8 9 =.) (1 =1) (3 5=0),gen(a4_spouse)


recode a1_relative (0 8 9 =.) (1 3 =1) (5=0)
recode a2_relative (0 8 9 =.) (1 3 =1) (5=0)
recode a3_relative (0 8 9 =.) (1 3 =1) (5=0)
recode a4_relative (0 8 9 =.) (1 3 =1) (5=0)

egen n_relative = rowtotal(a?_relative)
	replace n_relative = . if missing(n_size)
egen p_relative = rowmean(a?_relative)

*codebook V001704-V001709
lookfor male
gen a1_male = V001704 == 1 if V001704 > 0 & V001704 < 8 
gen a2_male = V001712 == 1 if V001712 > 0 & V001712 < 8 
gen a3_male = V001720 == 1 if V001720 > 0 & V001720 < 8 
gen a4_male = V001728 == 1 if V001728 > 0 & V001728 < 8 

lookfor coworker 
gen a1_cowork = V001705 == 1 if V001705 > 0 & V001705 < 8 
gen a2_cowork = V001713 == 1 if V001713 > 0 & V001713 < 8 
gen a3_cowork = V001721 == 1 if V001721 > 0 & V001721 < 8 
gen a4_cowork = V001729 == 1 if V001729 > 0 & V001729 < 8 


lookfor church 
gen a1_same_church = V001706 == 1 if V001706 > 0 & V001706 < 8 
gen a2_same_church = V001714 == 1 if V001714 > 0 & V001714 < 8 
gen a3_same_church = V001722 == 1 if V001722 > 0 & V001722 < 8 
gen a4_same_church = V001730 == 1 if V001730 > 0 & V001730 < 8 

lookfor neighbor 
gen a1_neighbor = V001707 == 1 if V001707 > 0 & V001707 < 8 
gen a2_neighbor = V001715 == 1 if V001715 > 0 & V001715 < 8 
gen a3_neighbor = V001723 == 1 if V001723 > 0 & V001723 < 8 
gen a4_neighbor = V001731 == 1 if V001731 > 0 & V001731 < 8 

forvalues i = 1/4{
	replace a`i'_cowork = 0 if a`i'_relative == 1
	replace a`i'_neighbor = 0 if a`i'_relative == 1
}


lookfor discuss 
gen a1_discuss = V001708  if V001708 > 0 & V001708 < 8 
gen a2_discuss = V001716  if V001716 > 0 & V001716 < 8 
gen a3_discuss = V001724  if V001724 > 0 & V001724 < 8 
gen a4_discuss = V001732  if V001732 > 0 & V001732 < 8 

recode a1_discuss 7=0 5=1 3=2 1=3 
recode a2_discuss 7=0 5=1 3=2 1=3 
recode a3_discuss 7=0 5=1 3=2 1=3 
recode a4_discuss 7=0 5=1 3=2 1=3 


*                          655         0  0. NA. INAP, 5, 8, 9, 0 in Z1
*                           354         1  1. OFTEN
*                           593         3  3. SOMETIMES
*                           195         5  5. RARELY
*                             6         7  7. NEVER
*                             2         8  8. DK
*                             2         9  9. RF
lookfor know 
gen a1_knowpolitics = V001709 
gen a2_knowpolitics = V001717 
gen a3_knowpolitics = V001725 
gen a4_knowpolitics = V001733 

gen r_knowpolitics = 5 - V001745 if V001745 > 0 

recode a1_knowpolitics (0 8 9=.) (1=2) (3=1) (5=0)
recode a2_knowpolitics (0 8 9=.) (1=2) (3=1) (5=0)
recode a3_knowpolitics (0 8 9=.) (1=2) (3=1) (5=0)
recode a4_knowpolitics (0 8 9=.) (1=2) (3=1) (5=0)



preserve 
keep id a?_* 
reshape long a@_relative a@_vote a@_vote_dk a@_vote_bush a@_vote_bush_a a@_vote_same a@_vote_same_a /*
*/ a@_male a@_cowork a@_neighbor a@_same_church a@_discuss a@_knowpolitics, i(id) j(order)
drop if missing(a_relative)

saveold "${dir_processed}/dyad_2000.dta", replace version(12)
restore 

* ---------------- interview related variable
* intid
gen intid = V000070

gen wt_sample = V000002
gen wt_post = V000002a 
gen i_phonemode = V000126 ==5 if V000126 > 0 & V000126 < 7

gen sampcode = V000097
gen s_sampcode = string(sampcode)
gen size_samp = length(s_sampcode)

gen vstrat = ""
replace vstrat = substr(s_sampcode,1,1) if size_samp == 2 
replace vstrat = substr(s_sampcode,1,2) if size_samp == 3 

gen vpsu = ""
replace vpsu = substr(s_sampcode,2,1) if size_samp == 2 
replace vpsu = substr(s_sampcode,3,1) if size_samp == 3 

destring vstrat, replace 
destring vpsu, replace 

* ----------- interview date inforamtion
*gen time = real(M000008)
*gen time = V000008
gen time = V000130
replace time = "" if time == "0000"

gen month = "" if ~missing(time)
replace month = substr(time,1,2) 
*destring month, replace 

gen day = "" if ~missing(time)
replace day = substr(time, 3,2)
*destring day, replace 

gen year = 2000 

gen svydate = day+"/"+month+"/"+string(year)
gen svydate2 = date(svydate, "DMY")


tab svydate2
format svydate2 %td

*gen fips_county = V000087 
*gen state = V000079

tab V000080
gen state = V000080
recode state 99=. 96=.

bysort state: egen state_n = count(id)

* individual levels ---
gen r_party_int = abs(r_partyid7-4)
gen r_ideo_int = abs(r_ideo-4)

drop state 
gen state_no = V000080
recode state_no (95/max=.)
gen state_cl = ""

replace state_cl = "AL"  if state_no ==  1	
replace state_cl = "AZ"  if state_no ==  4	
replace state_cl = "AR"  if state_no ==  5	
replace state_cl = "CA"  if state_no ==  6	
replace state_cl = "CO"  if state_no ==  8	
replace state_cl = "CT"  if state_no ==  9	
replace state_cl = "DE"  if state_no == 10	
replace state_cl = "WA"  if state_no == 11	
replace state_cl = "FL"  if state_no == 12	
replace state_cl = "GA"  if state_no == 13	
replace state_cl = "ID"  if state_no == 16	
replace state_cl = "IL"  if state_no == 17	
replace state_cl = "IN"  if state_no == 18	
replace state_cl = "IA"  if state_no == 19	
replace state_cl = "KS"  if state_no == 20	
replace state_cl = "KY"  if state_no == 21	
replace state_cl = "LA"  if state_no == 22	
replace state_cl = "ME"  if state_no == 23	
replace state_cl = "MD"  if state_no == 24	
replace state_cl = "MA"  if state_no == 25	
replace state_cl = "MI"  if state_no == 26	
replace state_cl = "MN"  if state_no == 27	
replace state_cl = "MS"  if state_no == 28	
replace state_cl = "MO"  if state_no == 29	
replace state_cl = "MT"  if state_no == 30	
replace state_cl = "NE"  if state_no == 31	
replace state_cl = "NV"  if state_no == 32	
replace state_cl = "NH"  if state_no == 33	
replace state_cl = "NJ"  if state_no == 34	
replace state_cl = "NM"  if state_no == 35	
replace state_cl = "NY"  if state_no == 36	
replace state_cl = "NC"  if state_no == 37	
replace state_cl = "ND"  if state_no == 38	
replace state_cl = "OH"  if state_no == 39	
replace state_cl = "OK"  if state_no == 40	
replace state_cl = "OR"  if state_no == 41	
replace state_cl = "PA"  if state_no == 42	
replace state_cl = "RI"  if state_no == 44	
replace state_cl = "SC"  if state_no == 45	
replace state_cl = "SD"  if state_no == 46	
replace state_cl = "TN"  if state_no == 47	
replace state_cl = "TX"  if state_no == 48	
replace state_cl = "UT"  if state_no == 49	
replace state_cl = "VT"  if state_no == 50	
replace state_cl = "VA"  if state_no == 51	
replace state_cl = "WA"  if state_no == 53	
replace state_cl = "WV"  if state_no == 54	
replace state_cl = "WI"  if state_no == 55	
replace state_cl = "WY"  if state_no == 56	

gen state = state_cl 


gen dataset = "ANES"
keep id intid dataset sampcode vpsu vstrat wt_sample wt_post i_* year svydate2 /*
*/ n_size r_* state* a* n_* p_* 

saveold "${dir_processed}/anes_2000.dta", replace version(12)
